suppressMessages(library(Matrix))
suppressMessages(library(SingleCellExperiment))
suppressMessages(library(biomaRt))
suppressMessages(library(scDblFinder))
suppressMessages(library(Seurat))
suppressMessages(library(ggplot2))
suppressMessages(library(scran))
suppressMessages(library(scater))
suppressMessages(library(ggpubr))
suppressMessages(library(sctransform))
srt <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/srt-total-dataset.rds")
sce <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/sce-total.rds")
assays(sce)$logcounts <- NULL
assays(sce)$limma <- NULL
assays(sce)$limma_dataset <- NULL
df <- readRDS("/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/integration/data/qc-df.rds")
keep_macrophages <- srt$RNA_snn_res.0.1
keep_macrophages <- keep_macrophages %in% c(0,3)
srt <- srt[,keep_macrophages]
sce <- sce[, keep_macrophages]
df <- df[keep_macrophages,]
df
logmat <- log2(counts(sce)+1)
df1 <- data.frame(total_counts = df$library_size,
total_features = df$total_features,
sample = sce$sample,
dataset = sce$dataset,
sorting = sce$sorting,
condition = sce$condition)
df2 <- data.frame(log10_total_counts = log10(df$library_size),
log10_total_features = log10(df$total_features),
dataset = sce$dataset,
sorting = sce$sorting,
sample = sce$sample)
varMatrix <- getVarianceExplained(logmat, variables = df1)
varMatrix[1:5,]
## total_counts total_features sample dataset sorting
## a 7.13906359 7.12053327 9.49776131 9.36763252 9.36786206
## A030001D20Rik 0.25132938 0.18967759 0.04015338 0.01254543 0.01661120
## A030005L19Rik 0.01396117 0.02128989 0.15640516 0.08617376 0.09013159
## A030014E15Rik 0.01116884 0.01044194 0.04226329 0.01235296 0.01819649
## A130010J15Rik 2.87287706 2.76662351 0.46264263 0.41429479 0.42324135
## condition
## a 7.20714815
## A030001D20Rik 0.02399383
## A030005L19Rik 0.08014567
## A030014E15Rik 0.01063593
## A130010J15Rik 0.27353676
plotExplanatoryVariables(
varMatrix,
variables = variables) +
ggtitle("Macrophages after QC") +
theme(text = element_text(size=20)) +
scale_color_manual(values=c( "dodgerblue" ,"purple" , "green3","orange", "blue", "red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 1182 rows containing non-finite values (`stat_density()`).
make_barplot <- function(df, x, y, title, ylab){
ggplot(df, aes(x=x, y=y, fill=x)) +
geom_violin(show.legend = FALSE) +
theme_bw() +
theme(text = element_text(size=20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab(ylab) + xlab("Sample") +
scale_fill_manual(values=sample_colors) +
ggtitle(title)
}
sample_colors <- c("blue", "green", "orange", "yellow3", "pink", "red", "dodgerblue", "coral1",
"purple", "magenta", "cyan", "grey")
make_barplot(df1, df1$sample, df1$total_counts, "All samples after QC", "Library size")
make_barplot(df2, df2$sample, df2$log10_total_counts, "All samples after QC", "Library size (log10)")
make_barplot(df1, df1$sample, df1$total_features, "All samples after QC", "Total features")
make_barplot(df2, df2$sample, df2$log10_total_features, "All samples after QC", "Total features (log10)")
make_barplot(df2, df2$dataset, df2$log10_total_counts, "All samples after QC", "Library size (log10)")
make_barplot(df2, df2$dataset, df2$log10_total_features, "All samples after QC", "Total features (log10)")
make_barplot(df2, df2$sorting, df2$log10_total_counts, "All samples after QC", "Library size (log10)")
make_barplot(df2, df2$sorting, df2$log10_total_features, "All samples after QC", "Total features (log10)")
Remove noise:
keep_genes <- rowSums(counts(sce)) > 10
sce <- sce[keep_genes,]
qclust <- quickCluster(sce)
sce <- computeSumFactors(sce, clusters=qclust)
summary(sizeFactors(sce))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1216 0.4972 0.6738 1.0000 1.1161 15.6796
sce <- logNormCounts(sce,
size_factors = sizeFactors(sce),
log = TRUE,
pseudo_count = 1,
exprs_values = "counts"
)
mat <- assays(sce)$logcounts
total_counts <- colSums((2**mat)-1)
total_counts[1:5]
## AAACCCAAGTGGAAGA-1_mct1 AAACCCACAGTGTGGA-1_mct1 AAACCCAGTTGGTACT-1_mct1
## 14192.04 14148.52 10632.85
## AAACGAAAGATCACTC-1_mct1 AAACGAACACGGATCC-1_mct1
## 15242.36 14627.17
df3 <- data.frame(total_counts = total_counts,
total_features = df$total_features,
sample = sce$sample,
sorting = sce$sorting,
dataset = sce$dataset,
condition = sce$condition)
df4 <- data.frame(log10_total_counts = log10(total_counts),
log10_total_features = log10(df$total_features),
sample = sce$sample,
dataset = sce$dataset,
sorting = sce$sorting)
make_barplot(df3, df3$sample, df3$total_counts, "All samples", "Library size")
make_barplot(df4, df4$sample, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)
make_barplot(df3, df3$sorting, df3$total_counts, "All samples", "Library size")
make_barplot(df4, df4$sorting, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)
make_barplot(df3, df3$dataset, df3$total_counts, "All samples", "Library size")
make_barplot(df4, df4$dataset, df4$log10_total_counts, "All samples", "Library size (log10)") +ylim(3.5,5)
normVarMatrix <- getVarianceExplained(assays(sce)$logcounts, variables = df3[,c("total_counts", "total_features", "sample", "condition", "sorting", "dataset")])
plotExplanatoryVariables(
normVarMatrix,
variables = variables) +
ggtitle("All samples") +
theme(text = element_text(size=20)) +
scale_color_manual(values=c( "green3","purple" ,"orange", "dodgerblue" , "blue", "red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
#saveRDS(sce, "/mnt/nmorais-nfs/marta/pA_karine/r_session/e12h-preliminary-analysis/macrophage_subset/data/sce-macrophages.rds")
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] sctransform_0.3.5 ggpubr_0.5.0
## [3] scater_1.22.0 scran_1.22.1
## [5] scuttle_1.4.0 ggplot2_3.4.2
## [7] SeuratObject_4.1.3 Seurat_4.1.1
## [9] scDblFinder_1.8.0 biomaRt_2.50.3
## [11] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0 GenomicRanges_1.46.1
## [15] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [17] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [19] MatrixGenerics_1.6.0 matrixStats_0.63.0
## [21] Matrix_1.5-3
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.4 reticulate_1.26
## [3] tidyselect_1.2.0 RSQLite_2.2.20
## [5] AnnotationDbi_1.56.2 htmlwidgets_1.6.4
## [7] grid_4.1.2 BiocParallel_1.28.3
## [9] Rtsne_0.16 munsell_0.5.0
## [11] ScaledMatrix_1.2.0 codetools_0.2-18
## [13] ica_1.0-3 statmod_1.5.0
## [15] xgboost_1.7.3.1 future_1.31.0
## [17] miniUI_0.1.1.1 withr_3.0.0
## [19] spatstat.random_3.1-3 colorspace_2.1-0
## [21] progressr_0.13.0 filelock_1.0.2
## [23] highr_0.10 knitr_1.45
## [25] rstudioapi_0.15.0 ROCR_1.0-11
## [27] ggsignif_0.6.4 tensor_1.5
## [29] listenv_0.9.0 labeling_0.4.3
## [31] GenomeInfoDbData_1.2.7 polyclip_1.10-4
## [33] farver_2.1.1 bit64_4.0.5
## [35] parallelly_1.34.0 vctrs_0.6.5
## [37] generics_0.1.3 xfun_0.42
## [39] BiocFileCache_2.2.1 R6_2.5.1
## [41] ggbeeswarm_0.7.1 rsvd_1.0.5
## [43] locfit_1.5-9.7 bitops_1.0-7
## [45] spatstat.utils_3.0-1 cachem_1.0.8
## [47] DelayedArray_0.20.0 assertthat_0.2.1
## [49] promises_1.2.1 scales_1.3.0
## [51] beeswarm_0.4.0 gtable_0.3.4
## [53] beachmat_2.10.0 globals_0.16.2
## [55] goftest_1.2-3 rlang_1.1.3
## [57] splines_4.1.2 rstatix_0.7.2
## [59] lazyeval_0.2.2 broom_1.0.5
## [61] spatstat.geom_3.0-6 yaml_2.3.8
## [63] reshape2_1.4.4 abind_1.4-5
## [65] backports_1.4.1 httpuv_1.6.14
## [67] tools_4.1.2 ellipsis_0.3.2
## [69] spatstat.core_2.4-4 jquerylib_0.1.4
## [71] RColorBrewer_1.1-3 ggridges_0.5.6
## [73] Rcpp_1.0.12 plyr_1.8.9
## [75] sparseMatrixStats_1.6.0 progress_1.2.2
## [77] zlibbioc_1.40.0 purrr_1.0.2
## [79] RCurl_1.98-1.10 prettyunits_1.1.1
## [81] rpart_4.1.19 deldir_1.0-6
## [83] pbapply_1.7-0 viridis_0.6.2
## [85] cowplot_1.1.1 zoo_1.8-12
## [87] ggrepel_0.9.5 cluster_2.1.4
## [89] magrittr_2.0.3 data.table_1.15.2
## [91] scattermore_0.8 lmtest_0.9-40
## [93] RANN_2.6.1 fitdistrplus_1.1-8
## [95] hms_1.1.2 patchwork_1.2.0
## [97] mime_0.12 evaluate_0.23
## [99] xtable_1.8-4 XML_3.99-0.16.1
## [101] gridExtra_2.3 compiler_4.1.2
## [103] tibble_3.2.1 KernSmooth_2.23-20
## [105] crayon_1.5.2 htmltools_0.5.7
## [107] mgcv_1.8-41 later_1.3.2
## [109] tidyr_1.3.1 DBI_1.1.3
## [111] dbplyr_2.3.0 MASS_7.3-58.2
## [113] rappdirs_0.3.3 car_3.1-1
## [115] cli_3.6.2 parallel_4.1.2
## [117] metapod_1.2.0 igraph_1.3.5
## [119] pkgconfig_2.0.3 sp_1.6-0
## [121] plotly_4.10.1 spatstat.sparse_3.0-0
## [123] xml2_1.3.3 vipor_0.4.5
## [125] bslib_0.6.1 dqrng_0.3.0
## [127] XVector_0.34.0 stringr_1.5.1
## [129] digest_0.6.34 RcppAnnoy_0.0.20
## [131] spatstat.data_3.0-0 Biostrings_2.62.0
## [133] rmarkdown_2.26 leiden_0.4.3
## [135] uwot_0.1.14 edgeR_3.36.0
## [137] DelayedMatrixStats_1.16.0 curl_5.2.1
## [139] shiny_1.8.0 nlme_3.1-162
## [141] lifecycle_1.0.4 jsonlite_1.8.8
## [143] carData_3.0-5 BiocNeighbors_1.12.0
## [145] viridisLite_0.4.2 limma_3.50.3
## [147] fansi_1.0.6 pillar_1.9.0
## [149] lattice_0.20-45 KEGGREST_1.34.0
## [151] fastmap_1.1.1 httr_1.4.4
## [153] survival_3.5-0 glue_1.7.0
## [155] png_0.1-8 bluster_1.4.0
## [157] bit_4.0.5 stringi_1.8.3
## [159] sass_0.4.8 blob_1.2.3
## [161] BiocSingular_1.10.0 memoise_2.0.1
## [163] dplyr_1.1.4 irlba_2.3.5.1
## [165] future.apply_1.10.0